Initial work in mapping EDT and Waze incident data

Started 2017-11-28

Ideas:

# Read in libraries and set working directory
knitr::opts_chunk$set(echo = T, warning=F, message=F)
options(width = 2400, stringsAsFactors = F)

library(ggplot2)
library(tidyverse)
library(maps) # for mapping base layers
library(reshape)
library(DT) # for datatable
# devtools::install_github("ropensci/plotly") # for latest version
library(plotly) # do after ggplot2
library(sp)
library(maptools) # readShapePoly() and others
library(mapproj) # for coord_map()
library(rgdal) # for readOGR(), needed for reading in ArcM shapefiles
library(rgeos) # for gIntersection, to clip two shapefiles
library(raster)

wazedir <- "W:/SDI Pilot Projects/Waze/Working Documents"
knitr::opts_knit$set(root.dir = wazedir) 
# Initial run: Start with the projected EDT and Waze data that Lia has prepared. It appears this is just the one week of August data for MD; use aa pilot, expand to other periods after. Clean these data and save as .RData. If already done, load the processed data.

if(length(grep('EDT_Waze.RData', dir())) < 1){
  waz.shp <- readOGR(dsn = ".", "Waze_sample_projected")
  edt.shp <- readOGR(dsn = ".", "EDT_sample_projected")
  
  # Fix time values: convert pub_millis to usable POSIX date class
  waz.shp$time <- as.POSIXct(waz.shp$pub_millis/1000, origin = "1970-01-01", tz="America/New_York")
  # Convert crashdate and time in EDT to POSIX
  edt.time <- paste(as.character(edt.shp$CrashDate), " ",
                    edt.shp$HourofDay, ":",
                    formatC(as.numeric(as.character(edt.shp$MinuteofDa)), width = 2, flag = "0"), sep = "")
  edt.shp$time <- strptime(edt.time, format = "%Y/%m/%d %H:%M", tz="America/New_York")

  # Read in county data from Census
  counties <- readOGR("CensusCounty/.")
  md_counties <- counties[counties$STATEFP == 24,]

  # Save imported waze and EDT files as .RData for faster import after the first time
  save(file = "EDT_Waze.RData", list = c("waz.shp", "edt.shp", "md_counties"))
} else { load('EDT_Waze.RData') }

Initial mapping work

# Make a MD map
mdpoly <- map('state', 'Maryland')

# all points... 
points(waz.shp$longitude, waz.shp$latitude,
       pch = 16, 
       cex = 0.5,
       col = alpha('grey20', 0.1))

# all points... 
points(edt.shp$GPSLong_Ne, edt.shp$GPSLat,
       pch = 21, 
       cex = 0.5,
       col = alpha('midnightblue', 0.2))
counties <- map_data("county", "maryland")

waz.shp$tooltiptext <- with(waz.shp@data, paste(city, alert_type, time, sep = "\n"))
waz.shp$tooltiptext <- sub("NA", "", waz.shp$tooltiptext)

edt.shp$tooltiptext <- with(edt.shp@data, paste(County, HighestInj, time, sep = "\n"))
edt.shp$tooltiptext <- sub("NA", "", edt.shp$tooltiptext)


map.p <- ggplot() +
  ggtitle("2017-08-01 Spatial Overlay") +
  geom_polygon(data = counties, aes(x = long, y = lat, group = group), 
               color = "white", fill = "grey80") + theme_void() +
coord_fixed(1.3) +

  geom_point(data = waz.shp[format(waz.shp$time, "%d") == "01",]@data, # 2017-08-01 first
             aes(
               x = longitude, 
               y = latitude, 
               color = alert_type,
                text = tooltiptext),
             shape = 16) +

  geom_point(data = edt.shp[format(edt.shp$time, "%d") == "01",]@data, # 2017-08-01 first
             aes(
               x = GPSLong_Ne, 
               y = GPSLat, 
               text = tooltiptext,
               color = 'EDT'),
             shape = 16) +
  scale_color_brewer(palette = "Set1",
                        name = "Waze Alert Type and EDT Crash")

  
ggplotly(map.p, tooltip = "text", width = 1000)

Interactive table of Waze events by day

# make a table of frequency of alert_type by day
freq.tab <- aggregate(uuid ~ format(time, "%d") + alert_type,
          FUN = length,
          data = waz.shp@data)

datatable(freq.tab,
          filter = 'top',
          colnames = c("Day" = 1, "Alert Type" = 2, "Count"= 3),
          rownames = F,
          options = list(dom = "ftip"))

Spatiotemporal join

# for each EDT event, find the number and distance for nearest neighbors in space and time from Waze data
ex <- edt.shp@data

distbreaks <- c(0, 0.1, 0.5, 1, 5, 10, 15, 25, 50, 100, 300, 1000)
distcounts <- vector()

for(i in 1:nrow(ex)){

  daterangemin <- ex[i, 'time'] - 2*60*60
  daterangemax <- ex[i, 'time'] + 2*60*60
    
  # find waze events within 3 hrs of this crash
  
  wx <- waz.shp[waz.shp$time > daterangemin & waz.shp$time < daterangemax,]
  
  if(nrow(wx) ==0) {
    b <- rep(0, length(distbreaks))
  } else {
  w_sp <- SpatialPoints(coords = data.frame(wx$longitude, wx$latitude))
  e_sp <- SpatialPoints(coords = data.frame(ex[i,'GPSLong_Ne'], ex[i,'GPSLat']))
  
  t1 <- spDists(w_sp, e_sp, longlat = T) # distance in km to each edt event
  
  # Sum of events with 0.1 to 100 mi in various chunks
  b <- hist(t1, breaks = distbreaks, plot = F)$counts
  }

  distcounts <- rbind(distcounts, b)
  
  # Sum of events within 1 to 120 minutes
  
  }

colnames(distcounts) = distbreaks[2:length(distbreaks)]
rownames(distcounts) = ex$ID

dm <- melt(distcounts)
colnames(dm) <- c("ID", "Bin", "Count")

gp <- ggplot(dm) + geom_histogram(aes(x = Count)) + facet_wrap(~Bin) + ylab("Frequency") + xlab("Count of Waze Events")

ggplotly(gp, width = 1000)